In [ ]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
In [ ]:
# plotly to be shown in notebook
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
# Annotations in visual studio
import plotly.io as pio
pio.renderers.default = "notebook"
In [ ]:
df = pd.read_pickle('output/crowdfunding_datapreprocessed.pkl')
df
Out[ ]:
Funded Requested Difference amount Covered amount Funded allocation Activity Sector Use Country code Country Region Currency Term in months Lenders Borrower genders Repayment interval Credit types Population Borrowers
0 300 300 0.0 100.0 0.130725 Fruits & Vegetables Food To buy seasonal, fresh fruits to sell. PK Pakistan Lahore PKR 12 12 female irregular bronze 2.294890e+05 1.0
1 575 575 0.0 100.0 0.250557 Rickshaw Transportation to repair and maintain the auto rickshaw used ... PK Pakistan Lahore PKR 11 14 female, female irregular bronze 2.294890e+05 2.0
2 150 150 0.0 100.0 0.010664 Transportation Transportation To repair their old cycle-van and buy another ... IN India Maynaguri INR 43 6 female bullet bronze 1.406632e+06 1.0
3 200 200 0.0 100.0 0.087150 Embroidery Arts to purchase an embroidery machine and a variet... PK Pakistan Lahore PKR 11 8 female irregular bronze 2.294890e+05 1.0
4 400 400 0.0 100.0 0.174300 Milk Sales Food to purchase one buffalo. PK Pakistan Abdul Hakeem PKR 14 16 female monthly bronze 2.294890e+05 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
646828 0 25 100.0 0.0 0.000000 Livestock Agriculture [True, u'para compara: cemento, arenya y ladri... PY Paraguay Concepción USD 13 0 female monthly bronze 7.305842e+03 1.0
646829 0 25 100.0 0.0 0.000000 Livestock Agriculture Reviewed loan use in english. PK Pakistan Attock PKR 13 0 female monthly bronze 2.294890e+05 1.0
646830 0 125 100.0 0.0 0.000000 Livestock Agriculture Pretend the flagged issue was addressed by KC. MX Mexico Iztacalco MXN 13 0 female, female monthly bronze 1.315628e+05 2.0
646831 0 875 100.0 0.0 0.000000 Livestock Agriculture Translated loan use to english. BO Bolivia La Paz BOB 13 0 female, female monthly bronze 1.199265e+04 2.0
646832 0 250 100.0 0.0 0.000000 Livestock Agriculture Reviewed loan use in english. GH Ghana Dansoman GHS 13 0 female monthly bronze 3.239545e+04 1.0

646833 rows × 19 columns

In [ ]:
df.columns
Out[ ]:
Index(['Funded', 'Requested', 'Difference amount', 'Covered amount',
       'Funded allocation', 'Activity', 'Sector', 'Use', 'Country code',
       'Country', 'Region', 'Currency', 'Term in months', 'Lenders',
       'Borrower genders', 'Repayment interval', 'Credit types', 'Population',
       'Borrowers'],
      dtype='object')
In [ ]:
# sns.pairplot(df)
In [ ]:
corr = df.corr(method='pearson')
corr
Out[ ]:
Funded Requested Difference amount Covered amount Funded allocation Term in months Lenders Population Borrowers
Funded 1.000000 0.944459 -0.082333 0.082333 0.190441 0.154157 0.847734 -0.052024 0.483999
Requested 0.944459 1.000000 0.119310 -0.119310 0.182599 0.191184 0.796730 -0.042510 0.465392
Difference amount -0.082333 0.119310 1.000000 -1.000000 -0.012559 0.137786 -0.088321 -0.014070 -0.017926
Covered amount 0.082333 -0.119310 -1.000000 1.000000 0.012559 -0.137786 0.088321 0.014070 0.017926
Funded allocation 0.190441 0.182599 -0.012559 0.012559 1.000000 0.054697 0.183461 -0.080867 0.064349
Term in months 0.154157 0.191184 0.137786 -0.137786 0.054697 1.000000 0.233675 0.244807 -0.146500
Lenders 0.847734 0.796730 -0.088321 0.088321 0.183461 0.233675 1.000000 -0.048289 0.291680
Population -0.052024 -0.042510 -0.014070 0.014070 -0.080867 0.244807 -0.048289 1.000000 -0.036753
Borrowers 0.483999 0.465392 -0.017926 0.017926 0.064349 -0.146500 0.291680 -0.036753 1.000000
In [ ]:
corr.columns
Out[ ]:
Index(['Funded', 'Requested', 'Difference amount', 'Covered amount',
       'Funded allocation', 'Term in months', 'Lenders', 'Population',
       'Borrowers'],
      dtype='object')

Heatmap¶

In [ ]:
fig = px.imshow(corr, aspect='auto',
                color_continuous_scale='Plasma', text_auto=True)

fig.update_traces(text=corr,
                  texttemplate="%{text:.2f}",
                  textfont={'size': 14}
                  )

fig.update_xaxes(# side="top",
                 tickfont=dict(size=14),
                 tickmode='array',
                 tickvals=corr.columns,
                 ticktext=corr.columns
                 )

fig.update_yaxes(tickmode='array',
                 ticktext=corr.columns,
                 tickvals=corr.columns,
                 tickfont=dict(size=14)
                 )

fig.update_layout(title={'text': "Overview of correlations",
                         'font_size': 16,
                         'y': 0.95,
                         'x': 0.55,
                         'xanchor': 'center',
                         'yanchor': 'top'},
                  autosize=False,
                  width=800,
                  height=600,
                  coloraxis=dict(colorbar=dict(title="Significance"))
                  )

fig.show()

Population Density¶

In [ ]:
pop_density_sel=pd.read_pickle('output/population.pkl')

Source for creation of the data base population: https://population.un.org/wpp/Download/Standard/CSV/

In [ ]:
pop_density_sel.dtypes
Out[ ]:
Country      object
PopTotal    float64
dtype: object
In [ ]:
pop_density_sel
Out[ ]:
Country PopTotal
0 Afghanistan 40754.385
1 Africa 1406728.760
2 African Group 1404902.167
3 African Union 1405528.335
4 African Union: Central Africa 168061.108
... ... ...
469 World 7953952.577
470 World Bank Regional Groups (developing only) 6679106.813
471 Yemen 31154.866
472 Zambia 19470.238
473 Zimbabwe 15331.426

474 rows × 2 columns

PopDensity¶

In [ ]:
pop_density_all = pd.read_csv('input/WPP2019_TotalPopulationBySex.csv')
pop_density_all
Out[ ]:
LocID Location VarID Variant Time MidPeriod PopMale PopFemale PopTotal PopDensity
0 4 Afghanistan 2 Medium 1950 1950.5 4099.243 3652.874 7752.117 11.874
1 4 Afghanistan 2 Medium 1951 1951.5 4134.756 3705.395 7840.151 12.009
2 4 Afghanistan 2 Medium 1952 1952.5 4174.450 3761.546 7935.996 12.156
3 4 Afghanistan 2 Medium 1953 1953.5 4218.336 3821.348 8039.684 12.315
4 4 Afghanistan 2 Medium 1954 1954.5 4266.484 3884.832 8151.316 12.486
... ... ... ... ... ... ... ... ... ... ...
280927 716 Zimbabwe 207 Lower 95 PI 2080 2080.5 10576.533 11255.983 21836.893 56.448
280928 716 Zimbabwe 207 Lower 95 PI 2085 2085.5 10293.349 11050.875 21355.988 55.205
280929 716 Zimbabwe 207 Lower 95 PI 2090 2090.5 9920.336 10767.709 20689.956 53.483
280930 716 Zimbabwe 207 Lower 95 PI 2095 2095.5 9503.711 10412.184 19892.080 51.421
280931 716 Zimbabwe 207 Lower 95 PI 2100 2100.5 9090.075 9996.105 19061.177 49.273

280932 rows × 10 columns

Source of the .csv file: https://population.un.org/wpp/Download/Standard/CSV/

In [ ]:
pop_density = pop_density_all.loc[pop_density_all.loc[:,'Time'] == 2022, ['Location', 'Time', 'PopDensity']]
pop_density 
Out[ ]:
Location Time PopDensity
72 Afghanistan 2022 62.424
153 Afghanistan 2022 62.610
234 Afghanistan 2022 62.295
315 Afghanistan 2022 62.872
396 Afghanistan 2022 61.211
... ... ... ...
280444 Zimbabwe 2022 39.056
280525 Zimbabwe 2022 40.046
280606 Zimbabwe 2022 39.584
280687 Zimbabwe 2022 39.719
280768 Zimbabwe 2022 39.335

2757 rows × 3 columns

In [ ]:
pop_density_drop = pop_density.drop_duplicates(subset=['Location'], ignore_index=True)
pop_density_drop.reset_index()
pop_density_drop
Out[ ]:
Location Time PopDensity
0 Afghanistan 2022 62.424
1 Africa 2022 47.447
2 African Group 2022 47.820
3 African Union 2022 47.412
4 African Union: Central Africa 2022 31.855
... ... ... ...
469 World 2022 61.140
470 World Bank Regional Groups (developing only) 2022 72.725
471 Yemen 2022 59.009
472 Zambia 2022 26.191
473 Zimbabwe 2022 39.631

474 rows × 3 columns

In [ ]:
pop_sel = pop_density_drop[['Location', 'PopDensity']]
pop_sel
Out[ ]:
Location PopDensity
0 Afghanistan 62.424
1 Africa 47.447
2 African Group 47.820
3 African Union 47.412
4 African Union: Central Africa 31.855
... ... ...
469 World 61.140
470 World Bank Regional Groups (developing only) 72.725
471 Yemen 59.009
472 Zambia 26.191
473 Zimbabwe 39.631

474 rows × 2 columns

In [ ]:
pop_sel_2 = pop_sel.copy()
In [ ]:
pop_sel_2.rename(mapper={'Location':'Country'}, axis = 1, inplace = True)
pop_sel_2.columns
Out[ ]:
Index(['Country', 'PopDensity'], dtype='object')
In [ ]:
pop_sel_2.set_index('Country')
Out[ ]:
PopDensity
Country
Afghanistan 62.424
Africa 47.447
African Group 47.820
African Union 47.412
African Union: Central Africa 31.855
... ...
World 61.140
World Bank Regional Groups (developing only) 72.725
Yemen 59.009
Zambia 26.191
Zimbabwe 39.631

474 rows × 1 columns

In [ ]:
data_population = pop_sel_2.to_pickle('output/population.pkl')
In [ ]:
loan_funded = df.groupby(['Country']).agg({'Country': 'size','Requested': sum, 'Funded': sum})
loan_funded
Out[ ]:
Country Requested Funded
Country
Afghanistan 2 14000 14000
Albania 1934 2666500 2490000
Armenia 8629 12913075 11184350
Azerbaijan 1942 2886350 2697225
Belize 124 113500 113500
... ... ... ...
Vietnam 9534 14154350 12928975
Virgin Islands 2 10000 0
Yemen 2313 1879950 1784075
Zambia 775 1205075 1142350
Zimbabwe 3973 3415900 3341225

86 rows × 3 columns

In [ ]:
loan_funded.rename(mapper={'Country':'Nr of projects'}, axis = 1, inplace = True)
loan_funded.columns
Out[ ]:
Index(['Nr of projects', 'Requested', 'Funded'], dtype='object')
In [ ]:
loan_funded
Out[ ]:
Nr of projects Requested Funded
Country
Afghanistan 2 14000 14000
Albania 1934 2666500 2490000
Armenia 8629 12913075 11184350
Azerbaijan 1942 2886350 2697225
Belize 124 113500 113500
... ... ... ...
Vietnam 9534 14154350 12928975
Virgin Islands 2 10000 0
Yemen 2313 1879950 1784075
Zambia 775 1205075 1142350
Zimbabwe 3973 3415900 3341225

86 rows × 3 columns

In [ ]:
loan_funded_pop_dens = loan_funded.merge(pop_sel_2, on='Country', how='left')
loan_funded_pop_dens
Out[ ]:
Country Nr of projects Requested Funded PopDensity
0 Afghanistan 2 14000 14000 62.424
1 Albania 1934 2666500 2490000 104.612
2 Armenia 8629 12913075 11184350 104.389
3 Azerbaijan 1942 2886350 2697225 124.612
4 Belize 124 113500 113500 18.071
... ... ... ... ... ...
81 Vietnam 9534 14154350 12928975 NaN
82 Virgin Islands 2 10000 0 NaN
83 Yemen 2313 1879950 1784075 59.009
84 Zambia 775 1205075 1142350 26.191
85 Zimbabwe 3973 3415900 3341225 39.631

86 rows × 5 columns

In [ ]:
nan_sum = loan_funded_pop_dens.isna().sum()           
nan_sum[nan_sum > 0].sort_values(ascending=False)
Out[ ]:
PopDensity    10
dtype: int64
In [ ]:
m_v = loan_funded_pop_dens.loc[loan_funded_pop_dens.loc[:,'PopDensity'].isnull()]
m_v
Out[ ]:
Country Nr of projects Requested Funded PopDensity
7 Bolivia 8799 19837925 18275575 NaN
18 Cote D'Ivoire 1 50000 50000 NaN
35 Kosovo 1419 1947750 1778600 NaN
46 Moldova 348 718725 686850 NaN
49 Myanmar (Burma) 1870 3242300 3035850 NaN
55 Palestine 8160 13063325 12024450 NaN
72 Tanzania 5214 6826200 6516550 NaN
79 United States 6092 31495375 23158540 NaN
81 Vietnam 9534 14154350 12928975 NaN
82 Virgin Islands 2 10000 0 NaN
In [ ]:
countries = m_v['Country']
densities = [11.102,  87.24, 162.5, 122.167, 84.537, 887.964, 11.027, 16.977, 36.601, 319.133, 204.04]
combi = dict(zip(countries, densities))

for k,v in combi.items():
    loan_funded_pop_dens.loc[(loan_funded_pop_dens['Country'] == k) & (loan_funded_pop_dens['PopDensity'].isnull()), 'PopDensity'] = v
In [ ]:
loan_funded_pop_dens.isnull().sum()
Out[ ]:
Country           0
Nr of projects    0
Requested         0
Funded            0
PopDensity        0
dtype: int64
In [ ]:
loan_funded_pop_dens.loc[loan_funded_pop_dens['Country'] == 'Bolivia']
Out[ ]:
Country Nr of projects Requested Funded PopDensity
7 Bolivia 8799 19837925 18275575 11.102
In [ ]:
loan_funded_pop_dens.rename(mapper={'PopDensity':'Population density'}, axis = 1, inplace = True)
loan_funded_pop_dens.columns
Out[ ]:
Index(['Country', 'Nr of projects', 'Requested', 'Funded',
       'Population density'],
      dtype='object')

GeoMaps¶

In [ ]:
loan_funded_pop_dens['Covered amount'] = (loan_funded_pop_dens['Funded']/loan_funded_pop_dens['Requested'])*100
loan_funded_pop_dens
Out[ ]:
Country Nr of projects Requested Funded Population density Covered amount
0 Afghanistan 2 14000 14000 62.424 100.000000
1 Albania 1934 2666500 2490000 104.612 93.380836
2 Armenia 8629 12913075 11184350 104.389 86.612600
3 Azerbaijan 1942 2886350 2697225 124.612 93.447607
4 Belize 124 113500 113500 18.071 100.000000
... ... ... ... ... ... ...
81 Vietnam 9534 14154350 12928975 36.601 91.342767
82 Virgin Islands 2 10000 0 319.133 0.000000
83 Yemen 2313 1879950 1784075 59.009 94.900130
84 Zambia 775 1205075 1142350 26.191 94.794930
85 Zimbabwe 3973 3415900 3341225 39.631 97.813900

86 rows × 6 columns

In [ ]:
loan_funded_pop_dens['Project amounts'] = (loan_funded_pop_dens['Nr of projects']/loan_funded_pop_dens['Population density'])*100
loan_funded_pop_dens
Out[ ]:
Country Nr of projects Requested Funded Population density Covered amount Project amounts
0 Afghanistan 2 14000 14000 62.424 100.000000 3.203896
1 Albania 1934 2666500 2490000 104.612 93.380836 1848.736283
2 Armenia 8629 12913075 11184350 104.389 86.612600 8266.196630
3 Azerbaijan 1942 2886350 2697225 124.612 93.447607 1558.437390
4 Belize 124 113500 113500 18.071 100.000000 686.182281
... ... ... ... ... ... ... ...
81 Vietnam 9534 14154350 12928975 36.601 91.342767 26048.468621
82 Virgin Islands 2 10000 0 319.133 0.000000 0.626698
83 Yemen 2313 1879950 1784075 59.009 94.900130 3919.741056
84 Zambia 775 1205075 1142350 26.191 94.794930 2959.031728
85 Zimbabwe 3973 3415900 3341225 39.631 97.813900 10024.980445

86 rows × 7 columns

In [ ]:
loan_funded_pop_dens['Funded allocation'] = (loan_funded_pop_dens['Funded'] / loan_funded_pop_dens['Population density'])*100
loan_funded_pop_dens
Out[ ]:
Country Nr of projects Requested Funded Population density Covered amount Project amounts Funded allocation
0 Afghanistan 2 14000 14000 62.424 100.000000 3.203896 2.242727e+04
1 Albania 1934 2666500 2490000 104.612 93.380836 1848.736283 2.380224e+06
2 Armenia 8629 12913075 11184350 104.389 86.612600 8266.196630 1.071411e+07
3 Azerbaijan 1942 2886350 2697225 124.612 93.447607 1558.437390 2.164499e+06
4 Belize 124 113500 113500 18.071 100.000000 686.182281 6.280781e+05
... ... ... ... ... ... ... ... ...
81 Vietnam 9534 14154350 12928975 36.601 91.342767 26048.468621 3.532410e+07
82 Virgin Islands 2 10000 0 319.133 0.000000 0.626698 0.000000e+00
83 Yemen 2313 1879950 1784075 59.009 94.900130 3919.741056 3.023395e+06
84 Zambia 775 1205075 1142350 26.191 94.794930 2959.031728 4.361613e+06
85 Zimbabwe 3973 3415900 3341225 39.631 97.813900 10024.980445 8.430837e+06

86 rows × 8 columns

Geomap

In [ ]:
fig = px.scatter_geo(loan_funded_pop_dens, locations='Country', color='Funded', locationmode='country names', size='Funded allocation') # PopDensity
fig.show()
In [ ]:
fig = px.scatter_geo(loan_funded_pop_dens, locations='Country', color='Nr of projects', locationmode='country names', size='Project amounts')
fig.show()
In [ ]:
corr = loan_funded_pop_dens.corr(method='spearman')
corr
Out[ ]:
Nr of projects Requested Funded Population density Covered amount Project amounts Funded allocation
Nr of projects 1.000000 0.937674 0.937863 0.144145 -0.432759 0.848629 0.774000
Requested 0.937674 1.000000 0.999321 0.116034 -0.441677 0.817048 0.833690
Funded 0.937863 0.999321 1.000000 0.113629 -0.424318 0.817520 0.834407
Population density 0.144145 0.116034 0.113629 1.000000 -0.185919 -0.307657 -0.371283
Covered amount -0.432759 -0.441677 -0.424318 -0.185919 1.000000 -0.326698 -0.313282
Project amounts 0.848629 0.817048 0.817520 -0.307657 -0.326698 1.000000 0.949262
Funded allocation 0.774000 0.833690 0.834407 -0.371283 -0.313282 0.949262 1.000000
In [ ]:
loan_funded = df.groupby(['Country', 'Credit types']).agg({'Country': 'size','Covered amount': sum, 'Funded allocation': sum})
loan_funded
Out[ ]:
Country Covered amount Funded allocation
Country Credit types
Afghanistan bronze 2 200.00000 34.352132
silver 0 0.00000 0.000000
gold 0 0.00000 0.000000
platinum 0 0.00000 0.000000
Albania bronze 1933 182550.71875 85124.850944
... ... ... ... ...
Zambia platinum 0 0.00000 0.000000
Zimbabwe bronze 3972 392087.71875 21467.181200
silver 1 100.00000 326.127524
gold 0 0.00000 0.000000
platinum 0 0.00000 0.000000

344 rows × 3 columns

In [ ]:
loan_funded = df.groupby(['Country', 'Credit types']).agg({'Country': 'size','Requested': sum, 'Funded': sum})
loan_funded
Out[ ]:
Country Requested Funded
Country Credit types
Afghanistan bronze 2 14000 14000
silver 0 0 0
gold 0 0 0
platinum 0 0 0
Albania bronze 1933 2616500 2440000
... ... ... ... ...
Zambia platinum 0 0 0
Zimbabwe bronze 3972 3365900 3291225
silver 1 50000 50000
gold 0 0 0
platinum 0 0 0

344 rows × 3 columns

In [ ]:
loan_funded.rename(mapper={'Country':'Nr of projects'}, axis = 1, inplace = True)
loan_funded.columns
Out[ ]:
Index(['Nr of projects', 'Requested', 'Funded'], dtype='object')
In [ ]:
loan_funded_2=loan_funded.reset_index()
In [ ]:
loan_funded_2
Out[ ]:
Country Credit types Nr of projects Requested Funded
0 Afghanistan bronze 2 14000 14000
1 Afghanistan silver 0 0 0
2 Afghanistan gold 0 0 0
3 Afghanistan platinum 0 0 0
4 Albania bronze 1933 2616500 2440000
... ... ... ... ... ...
339 Zambia platinum 0 0 0
340 Zimbabwe bronze 3972 3365900 3291225
341 Zimbabwe silver 1 50000 50000
342 Zimbabwe gold 0 0 0
343 Zimbabwe platinum 0 0 0

344 rows × 5 columns

In [ ]:
pop_density_sel=pd.read_pickle('output/population.pkl')
In [ ]:
loan_funded = loan_funded_2.merge(pop_density_sel, on='Country', how='left')
loan_funded
Out[ ]:
Country Credit types Nr of projects Requested Funded PopDensity
0 Afghanistan bronze 2 14000 14000 62.424
1 Afghanistan silver 0 0 0 62.424
2 Afghanistan gold 0 0 0 62.424
3 Afghanistan platinum 0 0 0 62.424
4 Albania bronze 1933 2616500 2440000 104.612
... ... ... ... ... ... ...
339 Zambia platinum 0 0 0 26.191
340 Zimbabwe bronze 3972 3365900 3291225 39.631
341 Zimbabwe silver 1 50000 50000 39.631
342 Zimbabwe gold 0 0 0 39.631
343 Zimbabwe platinum 0 0 0 39.631

344 rows × 6 columns

In [ ]:
countries = m_v['Country']
densities = [11.102,  87.24, 162.5, 122.167, 84.537, 887.964, 11.027, 16.977, 36.601, 319.133, 204.04]
combi = dict(zip(countries, densities))

for k,v in combi.items():
    loan_funded_pop_dens.loc[(loan_funded['Country'] == k) & (loan_funded['PopDensity'].isnull()), 'PopDensity'] = v
In [ ]:
loan_funded['Requested'].sum()
Out[ ]:
560312125
In [ ]:
loan_funded['Funded'].sum()
Out[ ]:
522540915
In [ ]:
loan_funded['Funded allocation'] = (loan_funded['Funded'] / loan_funded['PopDensity'])*100
loan_funded
Out[ ]:
Country Credit types Nr of projects Requested Funded PopDensity Funded allocation
0 Afghanistan bronze 2 14000 14000 62.424 2.242727e+04
1 Afghanistan silver 0 0 0 62.424 0.000000e+00
2 Afghanistan gold 0 0 0 62.424 0.000000e+00
3 Afghanistan platinum 0 0 0 62.424 0.000000e+00
4 Albania bronze 1933 2616500 2440000 104.612 2.332428e+06
... ... ... ... ... ... ... ...
339 Zambia platinum 0 0 0 26.191 0.000000e+00
340 Zimbabwe bronze 3972 3365900 3291225 39.631 8.304673e+06
341 Zimbabwe silver 1 50000 50000 39.631 1.261639e+05
342 Zimbabwe gold 0 0 0 39.631 0.000000e+00
343 Zimbabwe platinum 0 0 0 39.631 0.000000e+00

344 rows × 7 columns

In [ ]:
loan_funded['Project allocation'] = (loan_funded['Nr of projects'] / loan_funded['PopDensity'])*100
loan_funded
Out[ ]:
Country Credit types Nr of projects Requested Funded PopDensity Funded allocation Project allocation
0 Afghanistan bronze 2 14000 14000 62.424 2.242727e+04 3.203896
1 Afghanistan silver 0 0 0 62.424 0.000000e+00 0.000000
2 Afghanistan gold 0 0 0 62.424 0.000000e+00 0.000000
3 Afghanistan platinum 0 0 0 62.424 0.000000e+00 0.000000
4 Albania bronze 1933 2616500 2440000 104.612 2.332428e+06 1847.780369
... ... ... ... ... ... ... ... ...
339 Zambia platinum 0 0 0 26.191 0.000000e+00 0.000000
340 Zimbabwe bronze 3972 3365900 3291225 39.631 8.304673e+06 10022.457167
341 Zimbabwe silver 1 50000 50000 39.631 1.261639e+05 2.523277
342 Zimbabwe gold 0 0 0 39.631 0.000000e+00 0.000000
343 Zimbabwe platinum 0 0 0 39.631 0.000000e+00 0.000000

344 rows × 8 columns

In [ ]:
loan_funded['Covered amount'] = (loan_funded['Funded'] / loan_funded['Requested'])*100
loan_funded
Out[ ]:
Country Credit types Nr of projects Requested Funded PopDensity Funded allocation Project allocation Covered amount
0 Afghanistan bronze 2 14000 14000 62.424 2.242727e+04 3.203896 100.000000
1 Afghanistan silver 0 0 0 62.424 0.000000e+00 0.000000 NaN
2 Afghanistan gold 0 0 0 62.424 0.000000e+00 0.000000 NaN
3 Afghanistan platinum 0 0 0 62.424 0.000000e+00 0.000000 NaN
4 Albania bronze 1933 2616500 2440000 104.612 2.332428e+06 1847.780369 93.254347
... ... ... ... ... ... ... ... ... ...
339 Zambia platinum 0 0 0 26.191 0.000000e+00 0.000000 NaN
340 Zimbabwe bronze 3972 3365900 3291225 39.631 8.304673e+06 10022.457167 97.781425
341 Zimbabwe silver 1 50000 50000 39.631 1.261639e+05 2.523277 100.000000
342 Zimbabwe gold 0 0 0 39.631 0.000000e+00 0.000000 NaN
343 Zimbabwe platinum 0 0 0 39.631 0.000000e+00 0.000000 NaN

344 rows × 9 columns

Scatterplot¶

In [ ]:
my_view = px.scatter(data_frame=loan_funded,
                     x='Requested',
                     y='Funded',
                     color='Credit types',
                     size='Nr of projects',
                    # color_discrete_map={'bronze':'blue','silver':'red','gold':'orange','platinum':'green'}
                     )
my_view.update_layout(title={'text':'Credit types', 
                        'y':0.93,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top'})

my_view.show()
In [ ]:
my_view = px.scatter(data_frame=loan_funded,
                     x='Covered amount',
                     y='Funded allocation',
                     color='Credit types',
                     size='Nr of projects',
                     color_discrete_map={'bronze':'blue','silver':'red','gold':'orange','platinum':'green'})
my_view.update_layout(title={'text':'Credit types in raport to population density', 
                        'y':0.93,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top'})

my_view.show()

Sector Barplot¶

In [ ]:
sector_order = df.groupby('Sector', as_index = False).agg(Sectors=('Sector','size')).sort_values(by='Sectors', ascending=False)
sector_order
Out[ ]:
Sector Sectors
0 Agriculture 174624
6 Food 135359
11 Retail 123186
12 Services 44844
2 Clothing 32554
8 Housing 30155
4 Education 30128
10 Personal Use 25533
13 Transportation 15392
1 Arts 11938
7 Health 9190
3 Construction 6263
9 Manufacturing 6204
5 Entertainment 829
14 Wholesale 634
In [ ]:
fig = px.bar(sector_order, y='Sectors', x='Sector', text_auto='.2s')

fig.update_layout(title={'text': 'Types of sectors of interest', 
                        'y':0.93,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top'})

fig.update_yaxes(title_text = 'Number of projects\' topics')
fig.show()

Wordcloud¶

In [ ]:
# conda install -c conda-forge wordcloud
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import string
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import codecs

nltk.download('punkt')
nltk.download('stopwords')
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/gabrielarotari/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/gabrielarotari/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Out[ ]:
True
In [ ]:
df_w = df.copy()
In [ ]:
df_w_agr= df_w.loc[df_w['Sector'] == 'Agriculture', 'Use'].astype('str')
In [ ]:
df_w_agr
Out[ ]:
6         To purchase a dairy cow and start a milk produ...
14           to expand her existing  poultry farm business.
25        to purchase potato seeds and fertilizers for g...
28        to purchase potato seeds and fertilizers for g...
29        to purchase potato seeds and fertilizer for fa...
                                ...                        
646828    [True, u'para compara: cemento, arenya y ladri...
646829                        Reviewed loan use in english.
646830       Pretend the flagged issue was addressed by KC.
646831                      Translated loan use to english.
646832                        Reviewed loan use in english.
Name: Use, Length: 174624, dtype: object
In [ ]:
df_w_agr.isnull().sum()
Out[ ]:
0
In [ ]:
joined_rows = ' '.join(df_w_agr)
#joined_rows
In [ ]:
for char in '.:,)!-[(]2':
    if char in joined_rows:
        joined_rows = joined_rows.replace(char, ' ')
In [ ]:
words = joined_rows.split(' ')
#words 
In [ ]:
words_final = []

for word in words:
    if word != "":
        words_final.append(word)
In [ ]:
import spacy
en = spacy.load('en_core_web_sm')
stopwords = en.Defaults.stop_words

cleaning = []

for token in words_final:
    if token not in stopwords:
        cleaning.append(token)
In [ ]:
from collections import Counter
dict_count = Counter(cleaning)
# dict_count
dict_count.most_common(5)
Out[ ]:
[('buy', 108464),
 ('purchase', 47424),
 ('fertilizer', 33566),
 ('seeds', 30087),
 ('farm', 24925)]
In [ ]:
my_wordcloud_final = WordCloud(width=800, height=800,
                               background_color='white',
                               max_words=300, 
                               # stopwords=STOP_WORDS,
                               min_font_size=12).generate_from_frequencies(dict_count)


plt.figure(figsize=(10, 10), facecolor=None)
plt.imshow(my_wordcloud_final, interpolation="bilinear")  # image displayed more smoothly
plt.axis('off')
plt.tight_layout(pad=10)
plt.show()